Allow error codes, such as 404, to be considered successful, potential solution for #1272

George Opritescu 9 年之前
父节点
当前提交
6232189688
共有 2 个文件被更改,包括 79 次插入1 次删除
  1. 30 1
      app/models/agents/website_agent.rb
  2. 49 0
      spec/models/agents/website_agent_spec.rb

+ 30 - 1
app/models/agents/website_agent.rb

@@ -103,6 +103,8 @@ module Agents
103 103
 
104 104
       Set `unzip` to `gzip` to inflate the resource using gzip.
105 105
 
106
+      Set `consider_http_error_success` to an array of ints, ex: `[404]` to consider also 404 as successes, and to scrape it.
107
+
106 108
       # Liquid Templating
107 109
 
108 110
       In Liquid templating, the following variable is available:
@@ -149,6 +151,7 @@ module Agents
149 151
       errors.add(:base, "either url, url_from_event, or data_from_event are required") unless options['url'].present? || options['url_from_event'].present? || options['data_from_event'].present?
150 152
       errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present?
151 153
       validate_extract_options!
154
+      validate_consider_http_success_option!
152 155
 
153 156
       # Check for optional fields
154 157
       if options['mode'].present?
@@ -166,6 +169,27 @@ module Agents
166 169
       validate_web_request_options!
167 170
     end
168 171
 
172
+    def validate_consider_http_success_option!
173
+      consider_success = options["consider_http_error_success"]
174
+      if consider_success != nil
175
+
176
+        if consider_success.class != Array
177
+          errors.add(:base,"Must be an array and specify at least one status code")
178
+        else
179
+          if consider_success.uniq.count != consider_success.count
180
+            errors.add(:base,"Duplicate http code found")
181
+          else
182
+            if consider_success.map(&:class).uniq != [Fixnum]
183
+              errors.add(:base,"Please make sure to use only integer values for code")
184
+            else
185
+              @error_codes_considered_success = consider_success
186
+            end
187
+          end
188
+        end
189
+
190
+      end
191
+    end
192
+
169 193
     def validate_extract_options!
170 194
       extraction_type = (extraction_type() rescue extraction_type(options))
171 195
       case extract = options['extract']
@@ -273,7 +297,7 @@ module Agents
273 297
       uri = Utils.normalize_uri(url)
274 298
       log "Fetching #{uri}"
275 299
       response = faraday.get(uri)
276
-      raise "Failed: #{response.inspect}" unless response.success?
300
+      raise "Failed: #{response.inspect}" unless consider_response_successful?(response)
277 301
 
278 302
       interpolation_context.stack {
279 303
         interpolation_context['_response_'] = ResponseDrop.new(response)
@@ -353,6 +377,11 @@ module Agents
353 377
     end
354 378
 
355 379
     private
380
+    def consider_response_successful?(response)
381
+      response.success? || begin
382
+        @error_codes_considered_success.present? && @error_codes_considered_success.include?(response.status)
383
+      end
384
+    end
356 385
 
357 386
     def handle_event_data(data, event, existing_payload)
358 387
       handle_data(data, event.payload['url'], existing_payload)

+ 49 - 0
spec/models/agents/website_agent_spec.rb

@@ -40,6 +40,23 @@ describe Agents::WebsiteAgent do
40 40
         expect(@checker).not_to be_valid
41 41
       end
42 42
 
43
+      it 'should validate the consider_http_error_success fields' do
44
+        @checker.options['consider_http_error_success'] = [404]
45
+        expect(@checker).to be_valid
46
+
47
+        @checker.options['consider_http_error_success'] = [404, 404]
48
+        expect(@checker).not_to be_valid
49
+
50
+        @checker.options['consider_http_error_success'] = [404.0]
51
+        expect(@checker).not_to be_valid
52
+
53
+        @checker.options['consider_http_error_success'] = ["not_a_code"]
54
+        expect(@checker).not_to be_valid
55
+
56
+        @checker.options['consider_http_error_success'] = []
57
+        expect(@checker).not_to be_valid
58
+      end
59
+
43 60
       it "should validate uniqueness_look_back" do
44 61
         @checker.options['uniqueness_look_back'] = "nonsense"
45 62
         expect(@checker).not_to be_valid
@@ -169,6 +186,38 @@ describe Agents::WebsiteAgent do
169 186
       end
170 187
     end
171 188
 
189
+    describe 'consider_http_error_success' do
190
+      it 'should allow scraping from a 404 result' do
191
+        json = {
192
+          'response' => {
193
+            'version' => 2,
194
+            'title' => "hello!"
195
+          }
196
+        }
197
+        zipped = ActiveSupport::Gzip.compress(json.to_json)
198
+        stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
199
+        site = {
200
+          'name' => "Some JSON Response",
201
+          'expected_update_period_in_days' => "2",
202
+          'type' => "json",
203
+          'url' => "http://gzip.com",
204
+          'mode' => 'on_change',
205
+          'consider_http_error_success': [404],
206
+          'extract' => {
207
+            'version' => { 'path' => 'response.version' },
208
+          },
209
+          # no unzip option
210
+        }
211
+        checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
212
+        checker.user = users(:bob)
213
+        checker.save!
214
+
215
+        checker.check
216
+        event = Event.last
217
+        expect(event.payload['version']).to eq(2)
218
+      end
219
+    end
220
+
172 221
     describe 'unzipping' do
173 222
       it 'should unzip automatically if the response has Content-Encoding: gzip' do
174 223
         json = {